Diffusion Models#
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from scipy.stats import kstest
from IPython.display import HTML
# === Parameters ===
np.random.seed(42)
n_samples = 10000 # Number of data points
T = 200 # Number of diffusion steps
beta_start = 1e-4 # Starting noise level
beta_end = 0.1 # Ending noise level
# === Initial Data: piecewise uniform with gaps ===
n1 = n_samples // 3
n2 = n_samples // 3
n3 = n_samples - n1 - n2
initial_data = np.concatenate([
np.random.uniform(-10, -6, size=n1),
np.random.uniform(-2, 2, size=n2),
np.random.uniform( 6, 10, size=n3),
])
# === Noise schedule ===
beta = np.linspace(beta_start, beta_end, T)
alpha = 1.0 - beta
bar_alpha = np.cumprod(alpha)
# === Precompute forward diffusion steps ===
diffusion_steps = [initial_data.copy()]
for t in range(T):
x_prev = diffusion_steps[-1]
noise = np.random.randn(n_samples)
x_next = np.sqrt(alpha[t]) * x_prev + np.sqrt(beta[t]) * noise
diffusion_steps.append(x_next)
# === Test normality at each step (KS test) ===
normality_p = []
for data in diffusion_steps:
m, s = data.mean(), data.std()
_, pvalue = kstest(data, 'norm', args=(m, s))
normality_p.append(pvalue)
# === Set up histogram bins & Gaussian PDF ===
bins = np.linspace(-12, 12, 60)
bin_centers = 0.5 * (bins[:-1] + bins[1:])
x_grid = np.linspace(-12, 12, 500)
gauss_pdf = np.exp(-0.5 * x_grid**2) / np.sqrt(2 * np.pi)
# === Create figure & initial plot ===
fig, ax = plt.subplots(figsize=(8, 5))
ax.set_xlim(-12, 12)
ax.set_ylim(0, 0.45)
ax.set_xlabel("Value")
ax.set_ylabel("Density")
ax.grid(True)
# initial histogram
hist_vals, _ = np.histogram(initial_data, bins=bins, density=True)
bars = ax.bar(bin_centers, hist_vals, width=bins[1]-bins[0], alpha=0.6, color='orange')
# overlay final Gaussian curve
line_pdf, = ax.plot(x_grid, gauss_pdf, 'r--', lw=2, label='Standard Gaussian')
ax.legend(loc='upper right')
# equation text (constant)
equation_text = ax.text(
0.5, 1.08,
r"$x_t = \sqrt{1-\beta_t}\,x_{t-1} + \sqrt{\beta_t}\,\epsilon,\quad \epsilon\sim\mathcal{N}(0,I)$",
transform=ax.transAxes, ha="center", va="bottom", fontsize=12
)
# subtitle text (updates each frame)
subtitle_text = ax.text(
0.5, 1.02,
"", transform=ax.transAxes, ha="center", va="bottom", fontsize=10
)
# === Animation update function ===
def update(frame):
data = diffusion_steps[frame]
hist_vals, _ = np.histogram(data, bins=bins, density=True)
for bar, h in zip(bars, hist_vals):
bar.set_height(h)
pval = normality_p[frame]
subtitle_text.set_text(f"Step {frame}/{T} | KS p-value = {pval:.3f}; close to 1 → Gaussian; close to 0 → Not Gaussian")
return (*bars, subtitle_text)
# === Create Animation ===
ani = FuncAnimation(
fig, update,
frames=len(diffusion_steps),
interval=50,
blit=True
)
# prevent static plot from showing
plt.close(fig)
# Display in Jupyter
HTML(ani.to_jshtml())
import numpy as np
import matplotlib.pyplot as plt
# === Parameters ===
np.random.seed(42)
n_samples = 10000 # Number of data points
T = 200 # Number of diffusion steps
beta_start = 1e-4 # Starting noise level
beta_end = 0.1 # Ending noise level
# === Initial Data: piecewise uniform with gaps ===
n1 = n_samples // 3
n2 = n_samples // 3
n3 = n_samples - n1 - n2
initial_data = np.concatenate([
np.random.uniform(-10, -6, size=n1),
np.random.uniform(-2, 2, size=n2),
np.random.uniform( 6, 10, size=n3),
])
# === Define Beta Schedules ===
schedules = {
'Linear': np.linspace(beta_start, beta_end, T),
'Quadratic': np.linspace(np.sqrt(beta_start), np.sqrt(beta_end), T)**2,
'Constant': np.full(T, beta_end)
}
# === Timesteps to visualize ===
timesteps = [0, T//2, T] # start, mid, end
# === Histogram bins ===
bins = np.linspace(-12, 12, 60)
# === Standard normal PDF for overlay ===
x_grid = np.linspace(-12, 12, 500)
gauss_pdf = np.exp(-0.5 * x_grid**2) / np.sqrt(2 * np.pi)
# === Colors matching the screenshot style ===
bar_color = "#F3B762"
edge_color = "#6D4301"
gauss_color = "r"
# === Create subplots: add extra col for beta curves ===
fig, axes = plt.subplots(
nrows=len(schedules),
ncols=len(timesteps) + 1,
figsize=(15, 8),
sharey=True
)
# === Plot Beta schedules column (leftmost) ===
for i, (name, beta) in enumerate(schedules.items()):
ax = axes[i, 0]
ax.plot(range(1, T+1), beta, color="C0")
ax.set_xlim(0, T)
ax.set_title(f"{name}\nBeta Schedule")
if i == len(schedules)-1:
ax.set_xlabel("Diffusion Step")
ax.set_ylabel("Beta Value")
# === Plot histograms for each schedule/timestep ===
for i, (name, beta) in enumerate(schedules.items()):
# Simulate forward diffusion for this beta schedule
diffusion = [initial_data.copy()]
for t in range(T):
x_prev = diffusion[-1]
noise = np.random.randn(n_samples)
x_next = np.sqrt(1 - beta[t]) * x_prev + np.sqrt(beta[t]) * noise
diffusion.append(x_next)
for j, t in enumerate(timesteps):
ax = axes[i, j+1] # +1 to account for leftmost beta plot
data = diffusion[t]
# Histogram (not line plot)
hist = ax.hist(data, bins=bins, density=True,
color=bar_color, edgecolor=edge_color, alpha=0.85)
# At final timestep overlay Gaussian
if t == T:
ax.plot(x_grid, gauss_pdf, gauss_color+"--", lw=2, label="Std Gaussian")
ax.legend()
ax.set_xlim(-12, 12)
ax.set_title(f"{name}\nstep {t}")
if j == 0:
ax.set_ylabel("Density")
if i == len(schedules)-1:
ax.set_xlabel("Value")
# Only set ylim for the very first histogram (top-left only)
if i == 0 and j == 0:
ax.set_ylim(0, 0.2)
plt.suptitle("Effect of Beta Schedule on Forward Diffusion", y=1.03, fontsize=16)
plt.tight_layout()
plt.show()